(**************************************************************************)
(*       ___                                                              *)
(*      ||M||                                                             *)
(*      ||A||       A project by Andrea Asperti                           *)
(*      ||T||                                                             *)
(*      ||I||       Developers:                                           *)
(*      ||T||         The HELM team.                                      *)
(*      ||A||         http://helm.cs.unibo.it                             *)
(*      \   /                                                             *)
(*       \ /        This file is distributed under the terms of the       *)
(*        v         GNU General Public License Version 2                  *)
(*                                                                        *)
(**************************************************************************)

include "re/lang.ma".

(* The type re of regular expressions over an alphabet $S$ is the smallest 
collection of objects generated by the following constructors: *)

inductive re (S: DeqSet) : Type[0] ≝
   z: re S
 | e: re S
 | s: S → re S
 | c: re S → re S → re S
 | o: re S → re S → re S
 | k: re S → re S.

interpretation "re epsilon" 'epsilon = (e ?).
interpretation "re or" 'plus a b = (o ? a b).
interpretation "re cat" 'middot a b = (c ? a b).
interpretation "re star" 'star a = (k ? a).

notation < "a" non associative with precedence 90 for @{ 'ps $a}.
notation > "` term 90 a" non associative with precedence 90 for @{ 'ps $a}.
interpretation "atom" 'ps a = (s ? a).

notation "`∅" non associative with precedence 90 for @{ 'empty }.
interpretation "empty" 'empty = (z ?).

(* The language sem{e} associated with the regular expression e is inductively 
defined by the following function: *)

let rec in_l (S : DeqSet) (r : re S) on r : word S → Prop ≝ 
match r with
[ z ⇒ ∅
| e ⇒ {ϵ}
| s x ⇒ {[x]}
| c r1 r2 ⇒ (in_l ? r1) · (in_l ? r2)
| o r1 r2 ⇒ (in_l ? r1) ∪ (in_l ? r2)
| k r1 ⇒ (in_l ? r1) ^*].

notation "\sem{term 19 E}" non associative with precedence 75 for @{'in_l $E}.
interpretation "in_l" 'in_l E = (in_l ? E).
interpretation "in_l mem" 'mem w l = (in_l ? l w).

lemma rsem_star : ∀S.∀r: re S. \sem{r^*} = \sem{r}^*.
// qed.

(* 
Pointed Regular expressions

We now introduce pointed regular expressions, that are the main tool we shall 
use for the construction of the automaton. 
A pointed regular expression is just a regular expression internally labelled 
with some additional points. Intuitively, points mark the positions inside the 
regular expression which have been reached after reading some prefix of
the input string, or better the positions where the processing of the remaining 
string has to be started. Each pointed expression for $e$ represents a state of 
the {\em deterministic} automaton associated with $e$; since we obviously have 
only a finite number of possible labellings, the number of states of the automaton 
is finite.

Pointed regular expressions provide the tool for an algebraic revisitation of 
McNaughton and Yamada's algorithm for position automata, making the proof of its 
correctness, that is far from trivial, particularly clear and simple. In particular, 
pointed expressions offer an appealing alternative to Brzozowski's derivatives, 
avoiding their weakest point, namely the fact of being forced to quotient derivatives 
w.r.t. a suitable notion of equivalence in order to get a finite number of states 
(that is not essential for recognizing strings, but is crucial for comparing regular 
expressions). 

Our main data structure is the notion of pointed item, that is meant whose purpose
is to encode a set of positions inside a regular expression. 
The idea of formalizing pointers inside a data type by means of a labelled version 
of the data type itself is probably one of the first, major lessons learned in the 
formalization of the metatheory of programming languages. For our purposes, it is 
enough to mark positions preceding individual characters, so we shall have two kinds 
of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *)

inductive pitem (S: DeqSet) : Type[0] ≝
   pz: pitem S
 | pe: pitem S
 | ps: S → pitem S
 | pp: S → pitem S
 | pc: pitem S → pitem S → pitem S
 | po: pitem S → pitem S → pitem S
 | pk: pitem S → pitem S.
 
(* A pointed regular expression (pre) is just a pointed item with an additional 
boolean, that must be understood as the possibility to have a trailing point at 
the end of the expression. As we shall see, pointed regular expressions can be 
understood as states of a DFA, and the boolean indicates if
the state is final or not. *)

definition pre ≝ λS.pitem S × bool.

interpretation "pitem star" 'star a = (pk ? a).
interpretation "pitem or" 'plus a b = (po ? a b).
interpretation "pitem cat" 'middot a b = (pc ? a b).
notation < ".a" non associative with precedence 90 for @{ 'pp $a}.
notation > "`. term 90 a" non associative with precedence 90 for @{ 'pp $a}.
interpretation "pitem pp" 'pp a = (pp ? a).
interpretation "pitem ps" 'ps a = (ps ? a).
interpretation "pitem epsilon" 'epsilon = (pe ?).
interpretation "pitem empty" 'empty = (pz ?).

(* The carrier $|i|$ of an item i is the regular expression obtained from i by 
removing all the points. Similarly, the carrier of a pointed regular expression 
is the carrier of its item. *)

let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝
 match l with
  [ pz ⇒ `∅
  | pe ⇒ ϵ
  | ps x ⇒ `x
  | pp x ⇒ `x
  | pc E1 E2 ⇒ (forget ? E1) · (forget ? E2)
  | po E1 E2 ⇒ (forget ? E1) + (forget ? E2)
  | pk E ⇒ (forget ? E)^* ].
 
(* notation < "|term 19 e|" non associative with precedence 70 for @{'forget $e}.*)
interpretation "forget" 'norm a = (forget ? a).

lemma erase_dot : ∀S.∀e1,e2:pitem S. |e1 · e2| = c ? (|e1|) (|e2|).
// qed.

lemma erase_plus : ∀S.∀i1,i2:pitem S.
  |i1 + i2| = |i1| + |i2|.
// qed.

lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*. 
// qed.

(* 
Comparing items and pres

Items and pres are very concrete datatypes: they can be effectively compared, 
and enumerated. In particular, we can define a boolean equality beqitem and a proof
beqitem_true that it refects propositional equality, enriching the set (pitem S)
to a DeqSet. *)

let rec beqitem S (i1,i2: pitem S) on i1 ≝ 
  match i1 with
  [ pz ⇒ match i2 with [ pz ⇒ true | _ ⇒ false]
  | pe ⇒ match i2 with [ pe ⇒ true | _ ⇒ false]
  | ps y1 ⇒ match i2 with [ ps y2 ⇒ y1==y2 | _ ⇒ false]
  | pp y1 ⇒ match i2 with [ pp y2 ⇒ y1==y2 | _ ⇒ false]
  | po i11 i12 ⇒ match i2 with 
    [ po i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22
    | _ ⇒ false]
  | pc i11 i12 ⇒ match i2 with 
    [ pc i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22
    | _ ⇒ false]
  | pk i11 ⇒ match i2 with [ pk i21 ⇒ beqitem S i11 i21 | _ ⇒ false]
  ].

lemma beqitem_true: ∀S,i1,i2. iff (beqitem S i1 i2 = true) (i1 = i2). 
#S #i1 elim i1
  [#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
  |#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
  |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
    [>(\P H) // | @(\b (refl …))]
  |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
    [>(\P H) // | @(\b (refl …))]
  |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
   normalize #H destruct 
    [cases (true_or_false (beqitem S i11 i21)) #H1
      [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H
      |>H1 in H; normalize #abs @False_ind /2/
      ]
    |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) //
    ]
  |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
   normalize #H destruct 
    [cases (true_or_false (beqitem S i11 i21)) #H1
      [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H
      |>H1 in H; normalize #abs @False_ind /2/
      ]
    |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) //
    ]
  |#i3 #Hind #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i4] %
   normalize #H destruct 
    [>(proj1 … (Hind i4) H) // |>(proj2 … (Hind i4) (refl …)) //]
  ]
qed. 

definition DeqItem ≝ λS.
  mk_DeqSet (pitem S) (beqitem S) (beqitem_true S).

(* We also add a couple of unification hints to allow the type inference system 
to look at (pitem S) as the carrier of a DeqSet, and at beqitem as if it was the 
equality function of a DeqSet. *)

unification hint  0 ≔ S; 
    X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
(* ---------------------------------------- *) ⊢ 
    pitem S ≡ carr X.
    
unification hint  0 ≔ S,i1,i2; 
    X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
(* ---------------------------------------- *) ⊢ 
    beqitem S i1 i2 ≡ eqb X i1 i2.

(*
Semantics of pointed regular expressions

The intuitive semantic of a point is to mark the position where
we should start reading the regular expression. The language associated
to a pre is the union of the languages associated with its points. *)

let rec in_pl (S : DeqSet) (r : pitem S) on r : word S → Prop ≝ 
match r with
[ pz ⇒ ∅
| pe ⇒ ∅
| ps _ ⇒ ∅
| pp x ⇒ { [x] }
| pc r1 r2 ⇒ (in_pl ? r1) · \sem{forget ? r2} ∪ (in_pl ? r2)
| po r1 r2 ⇒ (in_pl ? r1) ∪ (in_pl ? r2)
| pk r1 ⇒ (in_pl ? r1) · \sem{forget ? r1}^*  ].

interpretation "in_pl" 'in_l E = (in_pl ? E).
interpretation "in_pl mem" 'mem w l = (in_pl ? l w).

definition in_prl ≝ λS : DeqSet.λp:pre S. 
  if (\snd p) then \sem{\fst p} ∪ {ϵ} else \sem{\fst p}.
  
interpretation "in_prl mem" 'mem w l = (in_prl ? l w).
interpretation "in_prl" 'in_l E = (in_prl ? E).

(* The following, trivial lemmas are only meant for rewriting purposes. *)

lemma sem_pre_true : ∀S.∀i:pitem S. 
  \sem{〈i,true〉} = \sem{i} ∪ {ϵ}. 
// qed.

lemma sem_pre_false : ∀S.∀i:pitem S. 
  \sem{〈i,false〉} = \sem{i}. 
// qed.

lemma sem_cat: ∀S.∀i1,i2:pitem S. 
  \sem{i1 · i2} = \sem{i1} · \sem{|i2|} ∪ \sem{i2}.
// qed.

lemma sem_cat_w: ∀S.∀i1,i2:pitem S.∀w.
  \sem{i1 · i2} w = ((\sem{i1} · \sem{|i2|}) w ∨ \sem{i2} w).
// qed.

lemma sem_plus: ∀S.∀i1,i2:pitem S. 
  \sem{i1 + i2} = \sem{i1} ∪ \sem{i2}.
// qed.

lemma sem_plus_w: ∀S.∀i1,i2:pitem S.∀w. 
  \sem{i1 + i2} w = (\sem{i1} w ∨ \sem{i2} w).
// qed.

lemma sem_star : ∀S.∀i:pitem S.
  \sem{i^*} = \sem{i} · \sem{|i|}^*.
// qed.

lemma sem_star_w : ∀S.∀i:pitem S.∀w.
  \sem{i^*} w = (∃w1,w2.w1 @ w2 = w ∧ \sem{i} w1 ∧ \sem{|i|}^* w2).
// qed.

(* Below are a few, simple, semantic properties of items. In particular:
- not_epsilon_item : ∀S:DeqSet.∀i:pitem S. ¬ (\sem{i} ϵ).
- epsilon_pre : ∀S.∀e:pre S. (\sem{i} ϵ) ↔ (\snd e = true).
- minus_eps_item: ∀S.∀i:pitem S. \sem{i} ≐ \sem{i}-{[ ]}.
- minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} ≐ \sem{e}-{[ ]}.
The first property is proved by a simple induction on $i$; the other
results are easy corollaries. We need an auxiliary lemma first. *)

lemma append_eq_nil : ∀S.∀w1,w2:word S. w1 @ w2 = ϵ → w1 = ϵ.
#S #w1 #w2 cases w1 // #a #tl normalize #H destruct qed.

lemma not_epsilon_lp : ∀S:DeqSet.∀e:pitem S. ¬ (ϵ ∈ e).
#S #e elim e normalize /2/  
  [#r1 #r2 * #n1 #n2 % * /2/ * #w1 * #w2 * * #H 
   >(append_eq_nil …H…) /2/
  |#r1 #r2 #n1 #n2 % * /2/
  |#r #n % * #w1 * #w2 * * #H >(append_eq_nil …H…) /2/
  ]
qed.

lemma epsilon_to_true : ∀S.∀e:pre S. ϵ ∈ e → \snd e = true.
#S * #i #b cases b // normalize #H @False_ind /2/ 
qed.

lemma true_to_epsilon : ∀S.∀e:pre S. \snd e = true → ϵ ∈ e.
#S * #i #b #btrue normalize in btrue; >btrue %2 // 
qed.

lemma minus_eps_item: ∀S.∀i:pitem S. \sem{i} ≐ \sem{i}-{[ ]}.
#S #i #w % 
  [#H whd % // normalize @(not_to_not … (not_epsilon_lp …i)) //
  |* //
  ]
qed.

lemma minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} ≐ \sem{e}-{[ ]}.
#S * #i * 
  [>sem_pre_true normalize in ⊢ (??%?); #w % 
    [/3/ | * * // #H1 #H2 @False_ind @(absurd …H1 H2)]
  |>sem_pre_false normalize in ⊢ (??%?); #w % [ /3/ | * // ]
  ]
qed.

(*
Broadcasting points

Intuitively, a regular expression e must be understood as a pointed expression with a single 
point in front of it. Since however we only allow points before symbols, we must broadcast 
this initial point inside e traversing all nullable subexpressions, that essentially corresponds 
to the ϵ-closure operation on automata. We use the notation •(_) to denote such an operation;
its definition is the expected one: let us start discussing an example.

Example
Let us broadcast a point inside (a + ϵ)(b*a + b)b. We start working in parallel on the 
first occurrence of a (where the point stops), and on ϵ that gets traversed. We have hence 
reached the end of a + ϵ and we must pursue broadcasting inside (b*a + b)b. Again, we work in 
parallel on the two additive subterms b^*a and b; the first point is allowed to both enter the 
star, and to traverse it, stopping in front of a; the second point just stops in front of b. 
No point reached that end of b^*a + b hence no further propagation is possible. In conclusion: 
               •((a + ϵ)(b^*a + b)b) = 〈(•a + ϵ)((•b)^*•a + •b)b, false〉
*)

(* Broadcasting a point inside an item generates a pre, since the point could possibly reach 
the end of the expression. 
Broadcasting inside a i1+i2 amounts to broadcast in parallel inside i1 and i2.
If we define
                 〈i1,b1〉 ⊕ 〈i2,b2〉 = 〈i1 + i2, b1 ∨ b2〉
then, we just have •(i1+i2) = •(i1)⊕ •(i2).
*)

definition lo ≝ λS:DeqSet.λa,b:pre S.〈\fst a + \fst b,\snd a ∨ \snd b〉.
notation "a ⊕ b" left associative with precedence 65 for @{'oplus $a $b}.
interpretation "oplus" 'oplus a b = (lo ? a b).

lemma lo_def: ∀S.∀i1,i2:pitem S.∀b1,b2. 〈i1,b1〉⊕〈i2,b2〉=〈i1+i2,b1∨b2〉.
// qed.

(*
Concatenation is a bit more complex. In order to broadcast a point inside i1 · i2 
we should start broadcasting it inside i1 and then proceed into i2 if and only if a 
point reached the end of i1. This suggests to define •(i1 · i2) as •(i1) ▹ i2, where 
e ▹ i is a general operation of concatenation between a pre and an item, defined by 
cases on the boolean in e: 

       〈i1,true〉 ▹ i2  = i1 ◃ •(i_2)
       〈i1,false〉 ▹ i2 = i1 · i2
       
In turn, ◃ says how to concatenate an item with a pre, that is however extremely simple:

        i1 ◃ 〈i1,b〉  = 〈i_1 · i2, b〉

Let us come to the formalized definitions:
*)

definition pre_concat_r ≝ λS:DeqSet.λi:pitem S.λe:pre S.
  match e with [ mk_Prod i1 b ⇒ 〈i · i1, b〉].
 
notation "i ◃ e" left associative with precedence 65 for @{'lhd $i $e}.
interpretation "pre_concat_r" 'lhd i e = (pre_concat_r ? i e).

(* The behaviour of ◃ is summarized by the following, easy lemma: *)

lemma eq_to_ex_eq: ∀S.∀A,B:word S → Prop. 
  A = B → A ≐ B. 
#S #A #B #H >H /2/ qed.

lemma sem_pre_concat_r : ∀S,i.∀e:pre S.
  \sem{i ◃ e} ≐ \sem{i} · \sem{|\fst e|} ∪ \sem{e}.
#S #i * #i1 #b1 cases b1 [2: @eq_to_ex_eq //] 
>sem_pre_true >sem_cat >sem_pre_true /2/ 
qed.

(* The definition of $•(-)$ (eclose) and ▹ (pre_concat_l) are mutually recursive.
In this situation, a viable alternative that is usually simpler to reason about, 
is to abstract one of the two functions with respect to the other. In particular
we abstract pre_concat_l with respect to an input bcast function from items to
pres. *)

definition pre_concat_l ≝ λS:DeqSet.λbcast:∀S:DeqSet.pitem S → pre S.λe1:pre S.λi2:pitem S.
  match e1 with 
  [ mk_Prod i1 b1 ⇒ match b1 with 
    [ true ⇒ (i1 ◃ (bcast ? i2)) 
    | false ⇒ 〈i1 · i2,false〉
    ]
  ].

notation "a ▹ b" left associative with precedence 65 for @{'tril eclose $a $b}.
interpretation "item-pre concat" 'tril op a b = (pre_concat_l ? op a b).

notation "•" non associative with precedence 65 for @{eclose ?}.

(* We are ready to give the formal definition of the broadcasting operation. *)

let rec eclose (S: DeqSet) (i: pitem S) on i : pre S ≝
 match i with
  [ pz ⇒ 〈 `∅, false 〉
  | pe ⇒ 〈 ϵ,  true 〉
  | ps x ⇒ 〈 `.x, false〉
  | pp x ⇒ 〈 `.x, false 〉
  | po i1 i2 ⇒ •i1 ⊕ •i2
  | pc i1 i2 ⇒ •i1 ▹ i2
  | pk i ⇒ 〈(\fst (•i))^*,true〉].
  
notation "• x" non associative with precedence 65 for @{'eclose $x}.
interpretation "eclose" 'eclose x = (eclose ? x).

(* Here are a few simple properties of ▹ and •(-) *)

lemma eclose_plus: ∀S:DeqSet.∀i1,i2:pitem S.
  •(i1 + i2) = •i1 ⊕ •i2.
// qed.

lemma eclose_dot: ∀S:DeqSet.∀i1,i2:pitem S.
  •(i1 · i2) = •i1 ▹ i2.
// qed.

lemma eclose_star: ∀S:DeqSet.∀i:pitem S.
  •i^* = 〈(\fst(•i))^*,true〉.
// qed.

lemma odot_true : 
  ∀S.∀i1,i2:pitem S.
  〈i1,true〉 ▹ i2 = i1 ◃ (•i2).
// qed.

lemma odot_true_bis : 
  ∀S.∀i1,i2:pitem S.
  〈i1,true〉 ▹ i2 = 〈i1 · \fst (•i2), \snd (•i2)〉.
#S #i1 #i2 normalize cases (•i2) // qed.

lemma odot_false: 
  ∀S.∀i1,i2:pitem S.
  〈i1,false〉 ▹ i2 = 〈i1 · i2, false〉.
// qed.

(* The definition of •(-) (eclose) can then be lifted from items to pres
in the obvious way. *)

definition lift ≝ λS.λf:pitem S →pre S.λe:pre S. 
  match e with 
  [ mk_Prod i b ⇒ 〈\fst (f i), \snd (f i) ∨ b〉].
  
definition preclose ≝ λS. lift S (eclose S). 
interpretation "preclose" 'eclose x = (preclose ? x).

(* Obviously, broadcasting does not change the carrier of the item,
as it is easily proved by structural induction. *)

lemma erase_bull : ∀S.∀i:pitem S. |\fst (•i)| = |i|.
#S #i elim i // 
  [ #i1 #i2 #IH1 #IH2 >erase_dot <IH1 >eclose_dot
    cases (•i1) #i11 #b1 cases b1 // <IH2 >odot_true_bis //
  | #i1 #i2 #IH1 #IH2 >eclose_plus >(erase_plus … i1) <IH1 <IH2
    cases (•i1) #i11 #b1 cases (•i2) #i21 #b2 //  
  | #i #IH >eclose_star >(erase_star … i) <IH cases (•i) //
  ]
qed.

(* We are now ready to state the main semantic properties of ⊕, ◃ and •(-):

sem_oplus:     \sem{e1 ⊕ e2} ≐ \sem{e1} ∪ \sem{e2} 
sem_pcl:       \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}
sem_bullet     \sem{•i} ≐ \sem{i} ∪ \sem{|i|}

The proof of sem_oplus is straightforward. *)

lemma sem_oplus: ∀S:DeqSet.∀e1,e2:pre S.
  \sem{e1 ⊕ e2} ≐ \sem{e1} ∪ \sem{e2}. 
#S * #i1 #b1 * #i2 #b2 #w %
  [cases b1 cases b2 normalize /2/ * /3/ * /3/
  |cases b1 cases b2 normalize /2/ * /3/ * /3/
  ]
qed.

(* For the others, we proceed as follow: we first prove the following 
auxiliary lemma, that assumes sem_bullet:

sem_pcl_aux: 
   \sem{•i2} ≐  \sem{i2} ∪ \sem{|i2|} →
   \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}.

Then, using the previous result, we prove sem_bullet by induction 
on i. Finally, sem_pcl_aux and sem_bullet give sem_pcl. *)

lemma LcatE : ∀S.∀e1,e2:pitem S.
  \sem{e1 · e2} = \sem{e1} · \sem{|e2|} ∪ \sem{e2}. 
// qed.

lemma odot_dot_aux : ∀S.∀e1:pre S.∀i2:pitem S.
   \sem{•i2} ≐  \sem{i2} ∪ \sem{|i2|} →
   \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}.
#S * #i1 #b1 #i2 cases b1
  [2:#th >odot_false >sem_pre_false >sem_pre_false >sem_cat /2/
  |#H >odot_true >sem_pre_true @(eqP_trans … (sem_pre_concat_r …))
   >erase_bull @eqP_trans [|@(eqP_union_l … H)]
    @eqP_trans [|@eqP_union_l[|@union_comm ]]
    @eqP_trans [|@eqP_sym @union_assoc ] /3/ 
  ]
qed.
  
lemma minus_eps_pre_aux: ∀S.∀e:pre S.∀i:pitem S.∀A. 
 \sem{e} ≐ \sem{i} ∪ A → \sem{\fst e} ≐ \sem{i} ∪ (A - {[ ]}).
#S #e #i #A #seme
@eqP_trans [|@minus_eps_pre]
@eqP_trans [||@eqP_union_r [|@eqP_sym @minus_eps_item]]
@eqP_trans [||@distribute_substract] 
@eqP_substract_r //
qed.

theorem sem_bull: ∀S:DeqSet. ∀i:pitem S.  \sem{•i} ≐ \sem{i} ∪ \sem{|i|}.
#S #e elim e 
  [#w normalize % [/2/ | * //]
  |/2/ 
  |#x normalize #w % [ /2/ | * [@False_ind | //]]
  |#x normalize #w % [ /2/ | * // ] 
  |#i1 #i2 #IH1 #IH2 
   (* lhs = \sem{•(i1 ·i2)} *)
   >eclose_dot
   (* lhs =\sem{•(i1) ▹ i2)} *) 
   @eqP_trans [|@odot_dot_aux //] 
   (* lhs = \sem{•(i1)·\sem{|i2|}∪\sem{i2} *)
   @eqP_trans
     [|@eqP_union_r
       [|@eqP_trans [|@(cat_ext_l … IH1)] @distr_cat_r]]
   (* lhs = \sem{i1}·\sem{|i2|}∪\sem{|i1|}·\sem{|i2|}∪\sem{i2} *) 
   @eqP_trans [|@union_assoc]
   (* lhs = \sem{i1}·\sem{|i2|}∪(\sem{|i1|}·\sem{|i2|}∪\sem{i2}) *) 
   (* Now we work on the rhs that is 
      rhs = \sem{i1·i2} ∪ \sem{|i1·i2|} *)
   >sem_cat 
   (* rhs = \sem{i1}·\sem{|i2|} ∪ \sem{i2} ∪ \sem{|i1·i2|} *)
   @eqP_trans [||@eqP_sym @union_assoc]
   (* rhs = \sem{i1}·\sem{|i2|}∪ (\sem{i2} ∪ \sem{|i1·i2|}) *)
   @eqP_union_l @union_comm 
  |#i1 #i2 #IH1 #IH2 >eclose_plus
   @eqP_trans [|@sem_oplus] >sem_plus >erase_plus 
   @eqP_trans [|@(eqP_union_l … IH2)]
   @eqP_trans [|@eqP_sym @union_assoc]
   @eqP_trans [||@union_assoc] @eqP_union_r
   @eqP_trans [||@eqP_sym @union_assoc]
   @eqP_trans [||@eqP_union_l [|@union_comm]]
   @eqP_trans [||@union_assoc] /2/
  |#i #H >sem_pre_true >sem_star >erase_bull >sem_star
   @eqP_trans [|@eqP_union_r [|@cat_ext_l [|@minus_eps_pre_aux //]]]
   @eqP_trans [|@eqP_union_r [|@distr_cat_r]]
   @eqP_trans [|@union_assoc] @eqP_union_l >erase_star 
   @eqP_sym @star_fix_eps 
  ]
qed.

(*
Blank item
 
As a corollary of theorem sem_bullet, given a regular expression e, we can easily 
find an item with the same semantics of $e$: it is enough to get an item (blank e) 
having e as carrier and no point, and then broadcast a point in it. The semantics of
(blank e) is obviously the empty language: from the point of view of the automaton,
it corresponds with the pit state. *)

let rec blank (S: DeqSet) (i: re S) on i :pitem S ≝
 match i with
  [ z ⇒ `∅
  | e ⇒ ϵ
  | s y ⇒ `y
  | o e1 e2 ⇒ (blank S e1) + (blank S e2) 
  | c e1 e2 ⇒ (blank S e1) · (blank S e2)
  | k e ⇒ (blank S e)^* ].
  
lemma forget_blank: ∀S.∀e:re S.|blank S e| = e.
#S #e elim e normalize //
qed.

lemma sem_blank: ∀S.∀e:re S.\sem{blank S e} ≐ ∅.
#S #e elim e 
  [1,2:@eq_to_ex_eq // 
  |#s @eq_to_ex_eq //
  |#e1 #e2 #Hind1 #Hind2 >sem_cat 
   @eqP_trans [||@(union_empty_r … ∅)] 
   @eqP_trans [|@eqP_union_l[|@Hind2]] @eqP_union_r
   @eqP_trans [||@(cat_empty_l … ?)] @cat_ext_l @Hind1
  |#e1 #e2 #Hind1 #Hind2 >sem_plus 
   @eqP_trans [||@(union_empty_r … ∅)] 
   @eqP_trans [|@eqP_union_l[|@Hind2]] @eqP_union_r @Hind1
  |#e #Hind >sem_star
   @eqP_trans [||@(cat_empty_l … ?)] @cat_ext_l @Hind
  ]
qed.
   
theorem re_embedding: ∀S.∀e:re S. 
  \sem{•(blank S e)} ≐ \sem{e}.
#S #e @eqP_trans [|@sem_bull] >forget_blank 
@eqP_trans [|@eqP_union_r [|@sem_blank]]
@eqP_trans [|@union_comm] @union_empty_r.
qed.

(*
Lifted Operators
 
Plus and bullet have been already lifted from items to pres. We can now 
do a similar job for concatenation ⊙ and Kleene's star ⊛. *)

definition lifted_cat ≝ λS:DeqSet.λe:pre S. 
  lift S (pre_concat_l S eclose e).

notation "e1 ⊙ e2" left associative with precedence 70 for @{'odot $e1 $e2}.

interpretation "lifted cat" 'odot e1 e2 = (lifted_cat ? e1 e2).

lemma odot_true_b : ∀S.∀i1,i2:pitem S.∀b. 
  〈i1,true〉 ⊙ 〈i2,b〉 = 〈i1 · (\fst (•i2)),\snd (•i2) ∨ b〉.
#S #i1 #i2 #b normalize in ⊢ (??%?); cases (•i2) // 
qed.

lemma odot_false_b : ∀S.∀i1,i2:pitem S.∀b.
  〈i1,false〉 ⊙ 〈i2,b〉 = 〈i1 · i2 ,b〉.
// 
qed.
  
lemma erase_odot:∀S.∀e1,e2:pre S.
  |\fst (e1 ⊙ e2)| = |\fst e1| · (|\fst e2|).
#S * #i1 * * #i2 #b2 // >odot_true_b // 
qed.

(* Let us come to the star operation: *)

definition lk ≝ λS:DeqSet.λe:pre S.
  match e with 
  [ mk_Prod i1 b1 ⇒
    match b1 with 
    [true ⇒ 〈(\fst (eclose ? i1))^*, true〉
    |false ⇒ 〈i1^*,false〉
    ]
  ]. 

(* notation < "a \sup ⊛" non associative with precedence 90 for @{'lk $a}.*)
interpretation "lk" 'lk a = (lk ? a).
notation "a^⊛" non associative with precedence 90 for @{'lk $a}.


lemma ostar_true: ∀S.∀i:pitem S.
  〈i,true〉^⊛ = 〈(\fst (•i))^*, true〉.
// qed.

lemma ostar_false: ∀S.∀i:pitem S.
  〈i,false〉^⊛ = 〈i^*, false〉.
// qed.
  
lemma erase_ostar: ∀S.∀e:pre S.
  |\fst (e^⊛)| = |\fst e|^*.
#S * #i * // qed.

lemma sem_odot_true: ∀S:DeqSet.∀e1:pre S.∀i. 
  \sem{e1 ⊙ 〈i,true〉} ≐ \sem{e1 ▹ i} ∪ { [ ] }.
#S #e1 #i 
cut (e1 ⊙ 〈i,true〉 = 〈\fst (e1 ▹ i), \snd(e1 ▹ i) ∨ true〉) [//]
#H >H cases (e1 ▹ i) #i1 #b1 cases b1 
  [>sem_pre_true @eqP_trans [||@eqP_sym @union_assoc]
   @eqP_union_l /2/ 
  |/2/
  ]
qed.

lemma eq_odot_false: ∀S:DeqSet.∀e1:pre S.∀i. 
  e1 ⊙ 〈i,false〉 = e1 ▹ i.
#S #e1 #i  
cut (e1 ⊙ 〈i,false〉 = 〈\fst (e1 ▹ i), \snd(e1 ▹ i) ∨ false〉) [//]
cases (e1 ▹ i) #i1 #b1 cases b1 #H @H
qed.

(* We conclude this section with the proof of the main semantic properties
of ⊙ and ⊛. *)

lemma sem_odot: 
  ∀S.∀e1,e2: pre S. \sem{e1 ⊙ e2} ≐ \sem{e1}· \sem{|\fst e2|} ∪ \sem{e2}.
#S #e1 * #i2 * 
  [>sem_pre_true 
   @eqP_trans [|@sem_odot_true]
   @eqP_trans [||@union_assoc] @eqP_union_r @odot_dot_aux //
  |>sem_pre_false >eq_odot_false @odot_dot_aux //
  ]
qed.
      
theorem sem_ostar: ∀S.∀e:pre S. 
  \sem{e^⊛} ≐  \sem{e} · \sem{|\fst e|}^*.
#S * #i #b cases b
  [(* lhs = \sem{〈i,true〉^⊛} *)
   >sem_pre_true (* >sem_pre_true *) 
   (* lhs = \sem{(\fst (•i))^*}∪{ϵ} *)
   >sem_star >erase_bull
   (* lhs = \sem{\fst (•i)}·(\sem{|i|)^*∪{ϵ} *)
   @eqP_trans [|@eqP_union_r[|@cat_ext_l [|@minus_eps_pre_aux //]]]
   (* lhs = (\sem{i}∪(\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ} *)
   @eqP_trans [|@eqP_union_r [|@distr_cat_r]]
   (* lhs = (\sem{i}·(\sem{|i|)^*∪(\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ} *)
   @eqP_trans [|@union_assoc]
   (* lhs = (\sem{i}·(\sem{|i|)^*∪((\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ}) *)
   @eqP_trans [|@eqP_union_l[|@eqP_sym @star_fix_eps]]
   (* lhs = (\sem{i}·(\sem{|i|)^*∪(\sem{|i|)^* *)
   (* now we work on the right hand side, that is
      rhs = \sem{〈i,true〉}·(\sem{|i|}^* *)
   @eqP_trans [||@eqP_sym @distr_cat_r]
   (* rhs = (\sem{i}·(\sem{|i|)^*∪{ϵ}·(\sem{|i|)^* *)
   @eqP_union_l @eqP_sym @epsilon_cat_l
  |>sem_pre_false >sem_pre_false >sem_star /2/
  ]
qed.

